package crawler.richan;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zql.entity.AgencyEntity;

import dao.AgencyDao;
import util.Location;
import util.MybatisTool;
import util.TianYanCha;
//1.4-1.31维护
/*Exception in thread "main" java.lang.RuntimeException: 爬虫异常
	at crawler.richan.ZhengZhouRiChanCrawler2.getAgency(ZhengZhouRiChanCrawler2.java:100)
	at crawler.richan.ZhengZhouRiChanCrawler2.main(ZhengZhouRiChanCrawler2.java:167)
Caused by: java.net.SocketTimeoutException: Read timed out
	at java.net.SocketInputStream.socketRead0(Native Method)
	at java.net.SocketInputStream.socketRead(Unknown Source)
	at java.net.SocketInputStream.read(Unknown Source)
	at java.net.SocketInputStream.read(Unknown Source)
	at java.io.BufferedInputStream.read1(Unknown Source)
	at java.io.BufferedInputStream.read(Unknown Source)
	at sun.net.www.http.ChunkedInputStream.fastRead(Unknown Source)
	at sun.net.www.http.ChunkedInputStream.read(Unknown Source)
	at java.io.FilterInputStream.read(Unknown Source)
	at sun.net.www.protocol.http.HttpURLConnection$HttpInputStream.read(Unknown Source)
	at sun.net.www.protocol.http.HttpURLConnection$HttpInputStream.read(Unknown Source)
	at org.jsoup.helper.DataUtil.readToByteBuffer(DataUtil.java:163)
	at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:710)
	at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:629)
	at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:261)
	at org.jsoup.helper.HttpConnection.get(HttpConnection.java:250)
	at crawler.richan.ZhengZhouRiChanCrawler2.getAgency(ZhengZhouRiChanCrawler2.java:38)
	... 1 more
*/
public class ZhengZhouRiChanCrawler2 {
	public List<AgencyEntity> getAgency() {
		List<AgencyEntity> list = new ArrayList<AgencyEntity>();
		try {
			String getPro = "http://www.zznissan.com.cn/index.php/support/dealer_inquiry";
			Document proDoc = Jsoup.connect(getPro)
					.header("Host", "www.zznissan.com.cn")
					.header("Origin", "http://www.zznissan.com.cn")
					.header("Referer", "http://www.zznissan.com.cn/index.php/support/dealer_inquiry")
					.get();
			Element proSelect = proDoc.getElementById("province_id");
			Elements proOptions = proSelect.getElementsByTag("option");
			for(int i=1;i<proOptions.size();i++) {
				String proName = proOptions.get(i).text();
				String proNum = proOptions.get(i).attr("value");
				
				//获取城市
				String getCity = "http://www.zznissan.com.cn/index.php/support/ajax_city_bypid";
				String paramCity = "pid="+proNum;
				String cityText = getJsonPost(getCity, paramCity);
				JSONArray cityArray = JSON.parseArray(cityText);
				for(int j=0;j<cityArray.size();j++) {
					String cityName = cityArray.getJSONObject(j).getString("city_name");
					String cityNum = cityArray.getJSONObject(j).getString("city_id");
					
					String path = "http://www.zznissan.com.cn/index.php/support/ajax_jxs_point";
					String param = "province="+proNum+"&"+"city="+cityNum;
					String dealerJson = getJsonPost(path, param);
					
					JSONArray dealers = JSON.parseArray(dealerJson);
					for(int k=0;k<dealers.size();k++) {
						JSONObject dealer = dealers.getJSONObject(k);
						String name = dealer.getString("dealer_name");
						String address = dealer.getString("dealer_address");
						String sellTell = dealer.getString("dealer_tel");
						String serviceTell = dealer.getString("dealer_fwtel");
						String[] lngAndLat = Location.getLocation(address);
						String lng = lngAndLat[0];
						String lat = lngAndLat[1];
						List<String> shareholder = TianYanCha.getShareholder(name);
						String sControllingShareholder = shareholder.get(0);
						String sOtherShareholders = shareholder.get(1);
						AgencyEntity agency = new AgencyEntity();
						agency.setdCloseDate(null);
						agency.setdOpeningDate(null);
						agency.setdUpdateTime(new Timestamp(System.currentTimeMillis()));
						agency.setnBrandID(-1);
						agency.setsBrand("日产");

						agency.setnDealerIDWeb(-1);
						agency.setnManufacturerID(-1);
						agency.setsManufacturer("郑州日产");

						agency.setnState(1);
						agency.setsAddress(address);
						agency.setsCity(cityName);
						agency.setsCustomerServiceCall(serviceTell);
						agency.setsDealerName(name);
						agency.setsDealerType(null);
						agency.setsProvince(proName);
						agency.setsSaleCall(sellTell);
						agency.setsLongitude(lng);
						agency.setsLatitude(lat);
						agency.setsControllingShareholder(sControllingShareholder);
						agency.setsOtherShareholders(sOtherShareholders);
						list.add(agency);
					}
					
				}
			}
		} catch(Exception e) {
			throw new RuntimeException("爬虫异常", e);
		}
		return list;
	}
	public String getJsonPost(String path, String param) {

		PrintWriter pw = null;
		BufferedReader br = null;
		try {
			//String param = "province=1810&city=1896";
			//String path = "http://www.zznissan.com.cn/index.php/support/ajax_jxs_point";
			URL url = new URL(path);
			HttpURLConnection conn = (HttpURLConnection)url.openConnection();
			//设置参数
			conn.setConnectTimeout(600000000);
			conn.setReadTimeout(600000000);
			conn.setDoOutput(true);//需要输出
			conn.setDoInput(true);//需要输入
			conn.setUseCaches(false);//不允许缓存
			conn.setRequestMethod("POST");//设置请求方式,默认为get
			//设置通用属性
			conn.setRequestProperty("Connection", "Keep-Alive");// 维持长连接
			conn.setRequestProperty("user-agent",
                    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36");
			conn.setRequestProperty("Host", "www.zznissan.com.cn");
			conn.setRequestProperty("Origin", "http://www.zznissan.com.cn");
			conn.setRequestProperty("Referer", "http://www.zznissan.com.cn/index.php/support/dealer_inquiry");
			
			conn.connect();//可以省了,使用下面的urlConn.getOutputStream()会自动connect
			//建立输出流，向指向的URL传入参数
			pw = new PrintWriter(conn.getOutputStream());
			pw.print(param);
			pw.flush();
			//建立输入流,读取返回的信息
			br = new BufferedReader(new InputStreamReader(conn.getInputStream()));
			String line = null; // 每行内容
			String content = "";
			while ((line = br.readLine()) != null) {
				content += line;
			}
			//System.out.println(content);
			return content;
		} catch (MalformedURLException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			if(br != null) {
				try {
					br.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
			if(pw != null) {
				pw.close();
			}
		}
		
		return null;
		
		
		
	}
	public static void main(String[] args) {
		ZhengZhouRiChanCrawler2 crawler = new ZhengZhouRiChanCrawler2();
		System.out.println("爬虫开始...");
		List<AgencyEntity> agencys = crawler.getAgency();
		System.out.println("抓取完毕,正在存库");
		//CreateTable.addTable(tableName);
		for(AgencyEntity agency : agencys) {
			MybatisTool.save(agency);
		}
		MybatisTool.close();
		System.out.println("请查看数据库");
	}

}
